{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 60,
   "metadata": {},
   "outputs": [],
   "source": [
    "#加载飞桨、NumPy和相关类库\n",
    "import paddle\n",
    "from paddle.nn import Linear\n",
    "import paddle.nn.functional as F\n",
    "import numpy as np\n",
    "import os\n",
    "import random"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "metadata": {},
   "outputs": [],
   "source": [
    "def load_data():\n",
    "    # 从文件导入数据\n",
    "    datafile = '../0/housing.data'\n",
    "    data = np.fromfile(datafile, sep=' ', dtype=np.float32)\n",
    "\n",
    "    # 每条数据包括14项，其中前面13项是影响因素，第14项是相应的房屋价格中位数\n",
    "    feature_names = [ 'CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', \\\n",
    "                      'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT', 'MEDV' ]\n",
    "    feature_num = len(feature_names)\n",
    "\n",
    "    # 将原始数据进行Reshape，变成[N, 14]这样的形状\n",
    "    data = data.reshape([data.shape[0] // feature_num, feature_num])\n",
    "\n",
    "    # 将原数据集拆分成训练集和测试集\n",
    "    # 这里使用80%的数据做训练，20%的数据做测试\n",
    "    # 测试集和训练集必须是没有交集的\n",
    "    ratio = 0.8\n",
    "    offset = int(data.shape[0] * ratio)\n",
    "    training_data = data[:offset]\n",
    "\n",
    "    # 计算train数据集的最大值，最小值\n",
    "    maximums, minimums = training_data.max(axis=0), training_data.min(axis=0)\n",
    "    \n",
    "    # 记录数据的归一化参数，在预测时对数据做归一化\n",
    "    global max_values\n",
    "    global min_values\n",
    "   \n",
    "    max_values = maximums\n",
    "    min_values = minimums\n",
    "    \n",
    "    # 对数据进行归一化处理\n",
    "    for i in range(feature_num):\n",
    "        data[:, i] = (data[:, i] - min_values[i]) / (maximums[i] - minimums[i])\n",
    "\n",
    "    # 训练集和测试集的划分比例\n",
    "    training_data = data[:offset]\n",
    "    test_data = data[offset:]\n",
    "    return training_data, test_data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "metadata": {},
   "outputs": [],
   "source": [
    "class Regressor(paddle.nn.Layer):\n",
    "\n",
    "    # self代表类的实例自身\n",
    "    def __init__(self):\n",
    "        # 初始化父类中的一些参数\n",
    "        super(Regressor, self).__init__()\n",
    "        \n",
    "        # 定义一层全连接层，输入维度是13，输出维度是1\n",
    "        self.fc = Linear(in_features=13, out_features=1)\n",
    "    \n",
    "    # 网络的前向计算\n",
    "    def forward(self, inputs):\n",
    "        x = self.fc(inputs)\n",
    "        return x"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 声明定义好的线性回归模型\n",
    "model = Regressor()\n",
    "# 开启模型训练模式\n",
    "model.train()\n",
    "# 加载数据\n",
    "training_data, test_data = load_data()\n",
    "# 定义优化算法，使用随机梯度下降SGD\n",
    "# 学习率设置为0.01\n",
    "opt = paddle.optimizer.SGD(learning_rate=0.01, parameters=model.parameters())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "epoch: 0, iter: 0, loss is: [0.07798183]\n",
      "epoch: 0, iter: 20, loss is: [0.45868373]\n",
      "epoch: 0, iter: 40, loss is: [0.05664458]\n",
      "epoch: 1, iter: 0, loss is: [0.1760987]\n",
      "epoch: 1, iter: 20, loss is: [0.06236817]\n",
      "epoch: 1, iter: 40, loss is: [0.11234251]\n",
      "epoch: 2, iter: 0, loss is: [0.1086928]\n",
      "epoch: 2, iter: 20, loss is: [0.11747213]\n",
      "epoch: 2, iter: 40, loss is: [0.11308786]\n",
      "epoch: 3, iter: 0, loss is: [0.11182571]\n",
      "epoch: 3, iter: 20, loss is: [0.05184934]\n",
      "epoch: 3, iter: 40, loss is: [0.19349143]\n",
      "epoch: 4, iter: 0, loss is: [0.1814951]\n",
      "epoch: 4, iter: 20, loss is: [0.05383878]\n",
      "epoch: 4, iter: 40, loss is: [0.18834545]\n",
      "epoch: 5, iter: 0, loss is: [0.0739925]\n",
      "epoch: 5, iter: 20, loss is: [0.12843503]\n",
      "epoch: 5, iter: 40, loss is: [0.15195669]\n",
      "epoch: 6, iter: 0, loss is: [0.06847687]\n",
      "epoch: 6, iter: 20, loss is: [0.05662089]\n",
      "epoch: 6, iter: 40, loss is: [0.06897908]\n",
      "epoch: 7, iter: 0, loss is: [0.01696825]\n",
      "epoch: 7, iter: 20, loss is: [0.06186926]\n",
      "epoch: 7, iter: 40, loss is: [0.08092441]\n",
      "epoch: 8, iter: 0, loss is: [0.04557494]\n",
      "epoch: 8, iter: 20, loss is: [0.04908922]\n",
      "epoch: 8, iter: 40, loss is: [0.02588345]\n",
      "epoch: 9, iter: 0, loss is: [0.10449976]\n",
      "epoch: 9, iter: 20, loss is: [0.04027183]\n",
      "epoch: 9, iter: 40, loss is: [0.02356497]\n"
     ]
    }
   ],
   "source": [
    "EPOCH_NUM = 10   # 设置外层循环次数\n",
    "BATCH_SIZE = 10  # 设置batch大小\n",
    "\n",
    "# 定义外层循环\n",
    "for epoch_id in range(EPOCH_NUM):\n",
    "    # 在每轮迭代开始之前，将训练数据的顺序随机的打乱\n",
    "    np.random.shuffle(training_data)\n",
    "    # 将训练数据进行拆分，每个batch包含10条数据\n",
    "    mini_batches = [training_data[k:k+BATCH_SIZE] for k in range(0, len(training_data), BATCH_SIZE)]\n",
    "    # 定义内层循环\n",
    "    for iter_id, mini_batch in enumerate(mini_batches):\n",
    "        x = np.array(mini_batch[:, :-1]) # 获得当前批次训练数据\n",
    "        y = np.array(mini_batch[:, -1:]) # 获得当前批次训练标签（真实房价）\n",
    "        # 将numpy数据转为飞桨动态图tensor的格式\n",
    "        house_features = paddle.to_tensor(x)\n",
    "        prices = paddle.to_tensor(y)\n",
    "        \n",
    "        # 前向计算\n",
    "        predicts = model(house_features)\n",
    "        \n",
    "        # 计算损失\n",
    "        loss = F.square_error_cost(predicts, label=prices)\n",
    "        avg_loss = paddle.mean(loss)\n",
    "        if iter_id%20==0:\n",
    "            print(\"epoch: {}, iter: {}, loss is: {}\".format(epoch_id, iter_id, avg_loss.numpy()))\n",
    "        \n",
    "        # 反向传播，计算每层参数的梯度值\n",
    "        avg_loss.backward()\n",
    "        # 更新参数，根据设置好的学习率迭代一步\n",
    "        opt.step()\n",
    "        # 清空梯度变量，以备下一轮计算\n",
    "        opt.clear_grad()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "模型保存成功，模型参数保存在LR_model.pdparams中\n"
     ]
    }
   ],
   "source": [
    "# 保存模型参数，文件名为LR_model.pdparams\n",
    "paddle.save(model.state_dict(), 'LR_model.pdparams')\n",
    "print(\"模型保存成功，模型参数保存在LR_model.pdparams中\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {},
   "outputs": [],
   "source": [
    "def load_one_example():\n",
    "    # 从上边已加载的测试集中，随机选择一条作为测试数据\n",
    "    idx = np.random.randint(0, test_data.shape[0])\n",
    "    idx = -10\n",
    "    one_data, label = test_data[idx, :-1], test_data[idx, -1]\n",
    "    # 修改该条数据shape为[1,13]\n",
    "    one_data =  one_data.reshape([1,-1])\n",
    "\n",
    "    return one_data, label"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Inference result is [[20.318707]], the corresponding label is 19.700000762939453\n"
     ]
    }
   ],
   "source": [
    "# 参数为保存模型参数的文件地址\n",
    "model_dict = paddle.load('LR_model.pdparams')\n",
    "model.load_dict(model_dict)\n",
    "model.eval()\n",
    "\n",
    "# 参数为数据集的文件地址\n",
    "one_data, label = load_one_example()\n",
    "# 将数据转为动态图的variable格式 \n",
    "one_data = paddle.to_tensor(one_data)\n",
    "predict = model(one_data)\n",
    "\n",
    "# 对结果做反归一化处理\n",
    "predict = predict * (max_values[-1] - min_values[-1]) + min_values[-1]\n",
    "# 对label数据做反归一化处理\n",
    "label = label * (max_values[-1] - min_values[-1]) + min_values[-1]\n",
    "\n",
    "print(\"Inference result is {}, the corresponding label is {}\".format(predict.numpy(), label))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
